{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ch7.6 AODE\n",
    "试编程实现 AODE 分类器，并以西瓜数据集 3.0 为训练集，对 p.151的“测 1” 样本进行判别。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "class AODE():\n",
    "    def __init__(self, num_classes=2,m=1):\n",
    "        self.num_classes = num_classes\n",
    "        self.m = m\n",
    "        self.p_c_x = np.zeros(self.num_classes)\n",
    "        \n",
    "    def predict(self, dataset, test_data):\n",
    "        self.d = dataset.shape[1]\n",
    "        self.N = np.zeros(self.d)\n",
    "        column_name = dataset.columns\n",
    "        for i in range(self.d):\n",
    "            self.N[i] = len(np.unique(dataset.ix[:,i]))\n",
    "        for c in range(self.num_classes):\n",
    "            for i in range(self.d):\n",
    "                xi = test_data[i]\n",
    "                D_xi = dataset[dataset[column_name[i]]==xi]\n",
    "                if len(D_xi) >= self.m:\n",
    "                    D_c_xi = D_xi[D_xi['label']==c]\n",
    "                    p_c_xi = (len(D_c_xi)+1)/(len(dataset)+self.num_classes*self.N[i])\n",
    "                    p_xi_xj = 1\n",
    "                    for j in range(self.d):\n",
    "                        xj = test_data[j]\n",
    "                        D_c_xi_xj = D_c_xi[D_c_xi[column_name[j]]==xj]\n",
    "                        p_xi_xj *= (len(D_c_xi_xj)+1)/(len(D_c_xi)+self.N[j])\n",
    "                    self.p_c_x[c] += p_c_xi*p_xi_xj\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "dataset = pd.read_csv('data/table_4_3_watermelon_3_0_num.csv')\n",
    "dataset = dataset.drop('Idx',axis=1)\n",
    "dataset = dataset.drop('density', axis=1)\n",
    "dataset = dataset.drop('sugar_ratio', axis=1)\n",
    "test_data = dataset.ix[0,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "aode = AODE()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "aode.predict(dataset, test_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.00058974,  0.08711895])"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aode.p_c_x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "toc": {
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
